######################
#  Replication code for 'Mediating the Electoral Connection', forthcoming in the JOP
#  John Henderson and John Brooks
#  12/7/2015    
######################

#### Estimates IRT ideal points and W-Nominate scores for 78:111 Congresses 
# run remotely on neyman

# MCMCpack here

rm(list=ls())      
set.seed(1005)     

if(length(which(installed.packages()[,1]=='matrixStats'))!=1){
	install.packages('matrixStats')                        
}                  
if(length(which(installed.packages()[,1]=='foreign'))!=1){
	install.packages('foreign')                        
}   
if(length(which(installed.packages()[,1]=='MCMCpack'))!=1){
	install.packages('MCMCpack')                        
}             
if(length(which(installed.packages()[,1]=='stringr'))!=1){
	install.packages('stringr')                        
}
library(matrixStats)
library(foreign)
library(MCMCpack)
library(stringr)                      
    
prFun=function(alpha,beta,theta,probit=T){
	if(probit==T){
		pr=pnorm(beta*theta-alpha)
	} else if(probit==F){
		pr=(exp(beta*theta-alpha))/(1+exp(beta*theta-alpha)) 	
	}      	 
	return(pr)
}

x=rnorm(mean=1.298,sd=.531,n=1000)  # 1.298 is the expected shift from our IV estimates 
x[which(x<=sort(x)[10])]=sample(x[which(x>sort(x)[10] & x<sort(x,decreasing=T)[10])],replace=T,size=10)
x[which(x>=sort(x,decreasing=T)[10])]=sample(x[which(x>sort(x)[10] & x<sort(x,decreasing=T)[10])],replace=T,size=10)

s1=(x/100)*1.5 # .75 #[on the scale of the expected estimated effect per expectation of .5 to .8 in above avg.]
s2=(x/100)*3 # 1.5 #[on the scale of the estimated effect per 1 in rain, close to the max in any given year]
s3=(x/100)*5 # 2.5  
ss1=mean(s1)
ss2=mean(s2)
ss3=mean(s3)

##########################
#  HOUSE  MODELS
##########################
path="~/Dropbox/rainReplication/simulations/simruns/ideal"
load(paste(path,"RollData.Rdata",sep=''))		
load(paste(path,"constraints.Rdata",sep=''))		

############
# ideal

ideal=list()    
cutpoint=list()   
rollD=rollData  

bs1=bs2=bs3=bm1=bm2=bm3=ms1=ms2=ms3=mm1=mm2=mm3=list()

theta.constraints=list()
theta.constraints[[1]]=list(KLEIN="-",GWYNNE="+")
theta.constraints[[2]]=list(POWELL="-",GWYNNE="+")
theta.constraints[[3]]=list(POWELL="-",GWYNNE="+")
theta.constraints[[4]]=list(POWELL="-",GROSS="+")
theta.constraints[[5]]=list(POWELL="-",GROSS="+")
theta.constraints[[6]]=list(POWELL="-",GROSS="+")
theta.constraints[[7]]=list(POWELL="-",GROSS="+")
theta.constraints[[8]]=list(POWELL="-",GROSS="+")
theta.constraints[[9]]=list(POWELL="-",GROSS="+")
theta.constraints[[10]]=list(POWELL="-",GROSS="+")
theta.constraints[[11]]=list(POWELL="-",GROSS="+")
theta.constraints[[12]]=list(CONYERS="-",GROSS="+")
theta.constraints[[13]]=list(CONYERS="-",GROSS="+")
theta.constraints[[14]]=list(CONYERS="-",GROSS="+")
theta.constraints[[15]]=list(CONYERS="-",GROSS="+")
theta.constraints[[16]]=list(CONYERS="-",GROSS="+")
theta.constraints[[17]]=list(CONYERS="-",HYDE="+")
theta.constraints[[18]]=list(CONYERS="-",HYDE="+")
theta.constraints[[19]]=list(CONYERS="-",HYDE="+")
theta.constraints[[20]]=list(CONYERS="-",HYDE="+")
theta.constraints[[21]]=list(CONYERS="-",HYDE="+")
theta.constraints[[22]]=list(CONYERS="-",HYDE="+")
theta.constraints[[23]]=list(CONYERS="-",HYDE="+")
theta.constraints[[24]]=list(CONYERS="-",HYDE="+")
theta.constraints[[25]]=list(CONYERS="-",HYDE="+")
theta.constraints[[26]]=list(CONYERS="-",HYDE="+")
theta.constraints[[27]]=list(CONYERS="-",SHADEGG="+")
theta.constraints[[28]]=list(CONYERS="-",SHADEGG="+")
theta.constraints[[29]]=list(CONYERS="-",SHADEGG="+")
theta.constraints[[30]]=list(CONYERS="-",SHADEGG="+")
theta.constraints[[31]]=list(CONYERS="-",SHADEGG="+")
theta.constraints[[32]]=list(CONYERS="-",SHADEGG="+")
theta.constraints[[33]]=list(CONYERS="-",SHADEGG="+")
theta.constraints[[34]]=list(CONYERS="-",SHADEGG="+")

base=77
sets=c((78+9+8+9):(78+9+8+9+7))  

for(k in sets){
	
rollD[[k]]=rollD[[k]][which(rollD[[k]][,7]!="LCV        "),]
rollD[[k]]=rollD[[k]][-c(1),]
rolls=as.data.frame(rollD[[k]][,8:ncol(rollD[[k]])])

if(length(which(str_length(names(rolls))>=6))>0){
	rolls=rolls[,-c(which(str_length(names(rolls))>=6))]
}

rownames(rolls)=rollD[[k]][,2] # icpsr ID as the roll call name, cand link this back to state and name if needed   [-c(1,nrow(rollData[[k]])),7]
rownames(rolls)[which(rownames(rolls)==constraints[k-base,1])]=as.matrix(constraints[k-base,2])
rownames(rolls)[which(rownames(rolls)==constraints[k-base,3])]=as.matrix(constraints[k-base,4])
	
for(j in 1:ncol(rolls)){
	indx1=which(rolls[,j]==0 | rolls[,j]==7 | rolls[,j]==8 | rolls[,j]==9)
	if(length(indx1)>0){
		rolls[indx1,j]=NA
	}
	indx2=which(rolls[,j]>3)
	if(length(indx2)>0){
		rolls[indx2,j]=0
	}
	indx3=which(rolls[,j]>1)
	if(length(indx3)>0){
		rolls[indx3,j]=1
	}
	rm(indx1,indx2,indx3)
}


posterior2 <- MCMCirt1d(rolls,theta.constraints=theta.constraints[[k-base]], burnin=1000, mcmc=10000, thin=10, verbose=500,store.item = TRUE)

alpha=(posterior2[,which(str_sub(colnames(posterior2),1,4)=='alph')])
a=as.numeric(gsub(colnames(posterior2)[which(str_sub(colnames(posterior2),1,4)=='alph')],pattern="[.a-zA-Z]",replace=''))

beta=(posterior2[,which(str_sub(colnames(posterior2),1,4)=='beta')])
b=as.numeric(gsub(colnames(posterior2)[which(str_sub(colnames(posterior2),1,4)=='beta')],pattern="[.a-zA-Z]",replace=''))

theta=(posterior2[,which(str_sub(colnames(posterior2),1,4)=='thet')])     
th=1:nrow(rolls)[1]                   

if(a[1]>1){
	ix=which(a[1]==as.numeric(gsub(colnames(rolls),pattern="[.a-zA-Z]",replace='')))   
	a=(a-as.numeric(gsub(colnames(rolls),pattern="[.a-zA-Z]",replace=''))[ix]+1)
}              
          
ysub=as.matrix(rolls[,a])
prMatM1=prMatM2=prMatM3=prMatS1=prMatS2=prMatS3=list()
billMatM1=billMatM2=billMatM3=billMatS1=billMatS2=billMatS3=list()      
p=nrow(posterior2)      
    
J=dim(rolls)[2]           
           

set.seed(1005)
for(h in 1:p){
	prMatM1[[h]]=prMatM2[[h]]=prMatM3[[h]]=prMatS1[[h]]=prMatS2[[h]]=prMatS3[[h]]=array(NA,dim(ysub)[1])
	billMatM1[[h]]=billMatM2[[h]]=billMatM3[[h]]=billMatS1[[h]]=billMatS2[[h]]=billMatS3[[h]]=array(0,dim(ysub)[2])#matrix(NA,dim(ysub)[1],dim(ysub)[2])       
	the=as.numeric(theta[h,])
	alp=as.numeric(alpha[h,])
	bet=as.numeric(beta[h,])   
	
	ym0=ym1=ym2=ym3=ym4=ym5=ym6=matrix(NA,dim(ysub)[1],dim(ysub)[2])          
	
	for(u in 1:length(th)){      
	  	     		
		mu0=(pnorm(bet*the[u]-alp))
		y0=mu0
		y0[which(mu0>.5)]=1
		y0[which(mu0<=.5)]=0
		ym0[u,]=y0
			
		mu1=(pnorm(bet*(the[u]+ss1)-alp))
		y1=mu1
		y1[which(mu1>.5)]=1
		y1[which(mu1<=.5)]=0     
	 	ym1[u,]=y1              
	                                  
		mu2=(pnorm(bet*(the[u]+ss2)-alp))
		y2=mu2
		y2[which(mu2>.5)]=1
		y2[which(mu2<=.5)]=0   
	    ym2[u,]=y2
	
		mu3=(pnorm(bet*(the[u]+ss3)-alp))
		y3=mu3
		y3[which(mu3>.5)]=1
		y3[which(mu3<=.5)]=0                    
		ym3[u,]=y3
		
		mu4=(pnorm(bet*(the[u]+s1[h])-alp))
		y4=mu4
		y4[which(mu4>.5)]=1
		y4[which(mu4<=.5)]=0   
		ym4[u,]=y4
		
		mu5=(pnorm(bet*(the[u]+s2[h])-alp))
		y5=mu5
		y5[which(mu5>.5)]=1
		y5[which(mu5<=.5)]=0 
		ym5[u,]=y5
		
		mu6=(pnorm(bet*(the[u]+s3[h])-alp))  
		y6=mu6
		y6[which(mu6>.5)]=1
		y6[which(mu6<=.5)]=0		
		ym6[u,]=y6

		prMatM1[[h]][u]=length(which(y0==1 & y1==0 | y0==0 & y1==1))
		prMatM2[[h]][u]=length(which(y0==1 & y2==0 | y0==0 & y2==1))
		prMatM3[[h]][u]=length(which(y0==1 & y3==0 | y0==0 & y3==1))
		prMatS1[[h]][u]=length(which(y0==1 & y4==0 | y0==0 & y4==1))
		prMatS2[[h]][u]=length(which(y0==1 & y5==0 | y0==0 & y5==1))
		prMatS3[[h]][u]=length(which(y0==1 & y6==0 | y0==0 & y6==1))
	}            
	
	billMatM1[[h]][which(colMeans(ym0)<=.5 & colMeans(ym1)>.5 | colMeans(ym0)>.5 & colMeans(ym1)<=.5)]=1
	billMatM2[[h]][which(colMeans(ym0)<=.5 & colMeans(ym2)>.5 | colMeans(ym0)>.5 & colMeans(ym2)<=.5)]=1
	billMatM3[[h]][which(colMeans(ym0)<=.5 & colMeans(ym3)>.5 | colMeans(ym0)>.5 & colMeans(ym3)<=.5)]=1
	billMatS1[[h]][which(colMeans(ym0)<=.5 & colMeans(ym4)>.5 | colMeans(ym0)>.5 & colMeans(ym4)<=.5)]=1
	billMatS2[[h]][which(colMeans(ym0)<=.5 & colMeans(ym5)>.5 | colMeans(ym0)>.5 & colMeans(ym5)<=.5)]=1
	billMatS3[[h]][which(colMeans(ym0)<=.5 & colMeans(ym6)>.5 | colMeans(ym0)>.5 & colMeans(ym6)<=.5)]=1				
}
             
                        
bsata1=bsata2=bsata3=bmata1=bmata2=bmata3=matrix(NA,p,length(a))
msata1=msata2=msata3=mmata1=mmata2=mmata3=matrix(NA,p,length(th))
for(j in 1:1000){
	bmata1[j,]=billMatM1[[j]]
	bmata2[j,]=billMatM2[[j]]
	bmata3[j,]=billMatM3[[j]]
	bsata1[j,]=billMatS1[[j]]
	bsata2[j,]=billMatS2[[j]]
	bsata3[j,]=billMatS3[[j]]  

	mmata1[j,]=prMatM1[[j]]
	mmata2[j,]=prMatM2[[j]]
	mmata3[j,]=prMatM3[[j]]
	msata1[j,]=prMatS1[[j]]
	msata2[j,]=prMatS2[[j]]
	msata3[j,]=prMatS3[[j]]

}
     
bs1[[k]]=bsata1
bs2[[k]]=bsata2
bs3[[k]]=bsata3
bm1[[k]]=bmata1
bm2[[k]]=bmata2
bm3[[k]]=bmata3
ms1[[k]]=msata1
ms2[[k]]=msata2
ms3[[k]]=msata3
mm1[[k]]=mmata1
mm2[[k]]=mmata2
mm3[[k]]=mmata3
           
rownames(rolls)[which(rownames(rolls)==constraints[k-base,2])]=as.matrix(constraints[k-base,1])
rownames(rolls)[which(rownames(rolls)==constraints[k-base,4])]=as.matrix(constraints[k-base,3])

cutpoint[[k]]=as.data.frame(matrix(NA,ncol(ysub),3))       
cutpoint[[k]][,3]=colVars(cbind(alpha/beta))
cutpoint[[k]][,2]=colMeans(alpha/beta)
cutpoint[[k]][,1]=colnames(ysub)       

ideal[[k]]=as.data.frame(matrix(NA,ncol(posterior2[,th]),3))
ideal[[k]][,3]=c(t((rowSums(((t(posterior2[,th])-(c(t(colMeans(posterior2[,th]))))))^2))/(nrow(posterior2[,th])-1)))
ideal[[k]][,2]=(c(t(colMeans(posterior2[,th]))))
ideal[[k]][,1]=rownames(rolls)
save(ideal,cutpoint,file='~/Dropbox/rainReplication/simulations/simruns/irt_ideal-4a_mod.Rdata')   

save(bs1,bs2,bs3,bm1,bm2,bm3,ms1,ms2,ms3,mm1,mm2,mm3,file='~/Dropbox/rainReplication/simulations/simruns/irt_pred-4a.Rdata')
}
   
NOMINATED=F
if(NOMINATED==T){ 
############
# wnominate

nominate=list()
rollD=rollData

library(pscl)
library(wnominate)

for(k in sets){
	
	rollD[[k]]=rollD[[k]][which(rollD[[k]][,7]!="LCV        "),]
	rollD[[k]]=rollD[[k]][-c(1),]
	rolls=as.data.frame(rollD[[k]][,8:ncol(rollD[[k]])])
	
	if(length(which(str_length(names(rolls))>=6))>0){
		rolls=rolls[,-c(which(str_length(names(rolls))>=6))]
	}
	
	cands=rownames(rolls)=rollD[[k]][,2] # icpsr ID as the roll call name, cand link this back to state and name if needed   [-c(1,nrow(rollData[[k]])),7]
	rownames(rolls)[which(rownames(rolls)==constraints[k-base,1])]=as.matrix(constraints[k-base,2])
	rownames(rolls)[which(rownames(rolls)==constraints[k-base,3])]=as.matrix(constraints[k-base,4])
	
	for(j in 1:ncol(rolls)){
		indx1=which(rolls[,j]==0 | rolls[,j]==7 | rolls[,j]==8 | rolls[,j]==9)
		if(length(indx1)>0){
			rolls[indx1,j]=NA
		}
		indx2=which(rolls[,j]>3)
		if(length(indx2)>0){
			rolls[indx2,j]=0
		}
		indx3=which(rolls[,j]>1)
		if(length(indx3)>0){
			rolls[indx3,j]=1
		}
		rm(indx1,indx2,indx3)
	}

	polarity=c(which(rownames(rolls)==(as.character(constraints[k-base,4]))))
	rolls=rollcall(rolls)

	posterior2=wnominate(rolls,dims=1,polarity=polarity,trials=100)
	
	nominate[[k]]=as.data.frame(matrix(NA,length(cands),3))
	nominate[[k]][,3]=posterior2$legislators$se1D
	nominate[[k]][,2]=posterior2$legislators$coord1D
	nominate[[k]][,1]=as.character(cands)
	save(nominate,file='~/Dropbox/rainReplication/nominate/irt_nominate-4.Rdata')
}

# END ideal point estimation 
}